global data_folder "W:\intimate\data"
global log_folder "W:\intimate\dofiles\logs"
global result_folder "W:\intimate\results"

use "W:\intimate\data\allwomen_data_clean_withDV", clear

// cleaning 
g insample = 0
replace insample=1 if dv_couple==1
replace insample=1 if dv_couple==0 & victim_events==0 & suspect_events==0
keep if insample==1
drop insample

replace nchildB1=5 if nchildB1>=5

gen language=(kieli=="fi")
gen spouse_language=(spouse_kieli=="fi")

// merge crime
g year_event = year_start_cohab-1
merge 1:1 shnro year_event using "W:\intimate\data\cumul_crime_violent"
drop if _merge==2
replace cum_crimes = 0 if _merge==1
drop _merge sphnro
ren cum_crimes pre_crimes

g sphnro = sphnro0
merge 1:1 sphnro year_event using "W:\intimate\data\cumul_crime_violent", keepusing(cum_crimes)
drop if _merge==2
replace cum_crimes = 0 if _merge==1
drop _merge sphnro
ren cum_crimes spouse_pre_crimes

replace pre_crimes = 5 if pre_crimes>5
replace spouse_pre_crimes = 5 if spouse_pre_crimes>5


gen spouse_college=spouse_pre_educ>=6
gen spouse_high_school=spouse_pre_educ>=3 & spouse_pre_educ<=5
gen spouse_ho=spouse_pre_educ<3

gen college=educ>=6
gen high_school=educ>=3 & educ<=5
gen ho=educ<3

forvalues y=2005/2015{
    preserve 
keep if year_event==`y'
// merge age of youngest child
merge 1:1 shnro year_event using "$data_folder\fertility_`y'.dta", keepusing(age_yst)
drop if _merge==2
replace age_yst = -1 if _merge==1
replace age_yst = -1 if age_yst==.
drop _merge 


// merge relationship history 
merge 1:1 shnro year_event using "$data_folder\relationship_hist_`y'.dta", keepusing(relation_duration ndistinct avg_relation_year)
drop if _merge==2
replace ndistinct = 0 if _merge==1
replace relation_duration = 0 if _merge==1
replace avg_relation_year = 0 if _merge==1
drop _merge 

// merge year since graduation
merge 1:1 shnro year_event using "$data_folder\year_since_graduation_`y'.dta", keepusing(yr_since_graduation)
drop if _merge==2
replace yr_since_graduation = 0 if _merge==1
drop _merge 

// merge mental health prescriptions 
merge 1:1 shnro year_event using "W:\intimate\data\prescriptions_depression_indlevel.dta", keepusing(depression)
drop if _merge==2
replace depression=0 if _merge==1 
drop _merge 

tempfile t`y'
save `t`y'', replace 
restore 
}

clear 
forvalues y=2005/2015{
    append using `t`y''
}

// split into coarsened exact matches (few merged due to tiny sample sizes)
g data_group = .
replace data_group =1 if ptoim1B1 ==0 & nchildB1==0 & spouse_pre_ptoim1==0 & college==0
replace data_group =2 if ptoim1B1 ==0 & nchildB1==0 & spouse_pre_ptoim1==0 & college==1
replace data_group =3 if ptoim1B1 ==0 & nchildB1==0 & spouse_pre_ptoim1==1 & college==0
replace data_group =4 if ptoim1B1 ==0 & nchildB1==0 & spouse_pre_ptoim1==1 & college==1
replace data_group =5 if ptoim1B1 ==0 & nchildB1>0 & spouse_pre_ptoim1==0 & college==0
replace data_group =5 if ptoim1B1 ==0 & nchildB1>0 & spouse_pre_ptoim1==0 & college==1
replace data_group =6 if ptoim1B1 ==0 & nchildB1>0 & spouse_pre_ptoim1==1 & college==0
replace data_group =7 if ptoim1B1 ==0 & nchildB1>0 & spouse_pre_ptoim1==1 & college==1
replace data_group =8 if ptoim1B1 ==1 & tyotuB1 < 20000 & nchildB1==0 & spouse_pre_ptoim1==0 & college==0
replace data_group =9 if ptoim1B1 ==1 & tyotuB1 < 20000 & nchildB1==0 & spouse_pre_ptoim1==0 & college==1
replace data_group =10 if ptoim1B1 ==1 & tyotuB1 < 20000 & nchildB1==0 & spouse_pre_ptoim1==1 & college==0
replace data_group =11 if ptoim1B1 ==1 & tyotuB1 < 20000 & nchildB1==0 & spouse_pre_ptoim1==1 & college==1
replace data_group =12 if ptoim1B1 ==1 & tyotuB1 < 20000 & nchildB1>0 & spouse_pre_ptoim1==0 & college==0
replace data_group =12 if ptoim1B1 ==1 & tyotuB1 < 20000 & nchildB1>0 & spouse_pre_ptoim1==0 & college==1
replace data_group =13 if ptoim1B1 ==1 & tyotuB1 < 20000 & nchildB1>0 & spouse_pre_ptoim1==1 & college==0
replace data_group =14 if ptoim1B1 ==1 & tyotuB1 < 20000 & nchildB1>0 & spouse_pre_ptoim1==1 & college==1
replace data_group =14 if ptoim1B1 ==1 & tyotuB1 >= 20000 & nchildB1==0 & spouse_pre_ptoim1==0 & college==0
replace data_group =15 if ptoim1B1 ==1 & tyotuB1 >= 20000 & nchildB1==0 & spouse_pre_ptoim1==0 & college==1
replace data_group =16 if ptoim1B1 ==1 & tyotuB1 >= 20000 & nchildB1==0 & spouse_pre_ptoim1==1 & college==0
replace data_group =17 if ptoim1B1 ==1 & tyotuB1 >= 20000 & nchildB1==0 & spouse_pre_ptoim1==1 & college==1
replace data_group =18 if ptoim1B1 ==1 & tyotuB1 >= 20000 & nchildB1>0 & spouse_pre_ptoim1==0 & college==0
replace data_group =19 if ptoim1B1 ==1 & tyotuB1 >= 20000 & nchildB1>0 & spouse_pre_ptoim1==0 & college==1
replace data_group =20 if ptoim1B1 ==1 & tyotuB1 >= 20000 & nchildB1>0 & spouse_pre_ptoim1==1 & college==0
replace data_group =21 if ptoim1B1 ==1 & tyotuB1 >= 20000 & nchildB1>0 & spouse_pre_ptoim1==1 & college==1


sort dv_couple data_group shnro year_start_cohab

forvalues i = 1/21 {
    preserve
	keep if data_group==`i'
	save "$data_folder\match_data_group`i'", replace
	restore
}

forvalues i = 1/21 {

    
	use "$data_folder\match_data_group`i'", clear
	
	set seed 12345
	sort dv_couple data_group shnro year_start_cohab
	
	forvalues s = 1/4 {
		xtile inc_group`s' = tyotuB`s', nq(5)
		g cohabit`s' = 1 - missing(sphnroB`s')
	}
	g age_group = floor(age/5)
	replace age_yst=100 if age_yst==-1
	g age_yst_group=floor(age_yst/5)
	replace ndistinct=3 if ndistinct>3
	g avg_relation_group=floor(avg_relation_year/3)

	g spouse_missing = 0
	replace spouse_missing = 1 if missing(spouse_pre_age)
	xtile spouse_group1 = spouse_pre_tyotu, nq(5)
	replace spouse_group1 = 0 if spouse_missing==1
	g spouse_age = floor(spouse_pre_age/5)
	replace spouse_age = 0 if spouse_missing==1
	replace spouse_pre_ptoim1 = 0 if spouse_missing==1
	
	g yr_since_grad_group=floor(yr_since_graduation/5)

	logit dv_couple i.age_group ho ///
				i.inc_group1 i.ptoim1B1 ///
				i.inc_group2 i.ptoim1B2 ///
				i.inc_group3 i.ptoim1B3 ///
				i.spouse_age spouse_college spouse_ho i.spouse_group1 i.nchildB1 ///
				i.spouse_missing i.year_start_cohab ///
				cohabit* ///
				i.inc_group4 i.ptoim1B4 ///
				i.age_yst_group i.ndistinct i.avg_relation_group i.yr_since_grad_group ///
				i.depression
				
	predict dv_prob, pr
	g missing_prob = missing(dv_prob)
	table missing_prob dv_couple
	drop if missing_prob==1
	
	// find 5 nearest neighbours 
	sort dv_couple data_group shnro year_start_cohab
	gen match_n = _n
	gen rand_n = runiform()
	
	sort match_n
	teffects nnmatch (rand_n dv_prob) (dv_couple), ///
			gen(match_ob) nneighbor(5)
			
	preserve
		keep if dv_couple==1 
		keep match_n match_ob1 match_ob2 match_ob3 match_ob4 match_ob5
		tempfile idwide
		save `idwide'
		
		forvalues n = 1/5 {
			use `idwide', clear
			keep match_ob`n' match_n
			ren match_n match_id
			ren match_ob`n' match_n
			tempfile match`n'
			save `match`n''
		}
		use `idwide'
		keep match_n
		g match_id = match_n
		tempfile match0
		save `match0'
		
		use `match0', clear
		forvalues n = 1/5 {
			append using `match`n''
		}
		drop if missing(match_n)
		tempfile mergeids
		save `mergeids'
					
	restore
	
	merge 1:m match_n using `mergeids'
	
	keep if _merge==3
	drop _merge 
	keep shnro year_start_cohab match_id
	
	merge m:1 shnro year_start_cohab using "$data_folder\match_data_group`i'.dta"
	keep if _merge == 3
	drop _merge
	sort dv_couple data_group shnro year_start_cohab match_id
	save "$data_folder\match_complete_group`i'", replace
}


use "$data_folder\match_complete_group1", clear
gen group=1
forvalues i = 2/21 {
    append using "$data_folder\match_complete_group`i'"
	replace group=`i' if group==. 
}
tab dv_couple
sort shnro year_start_cohab match_id

rename match_id match_id_old
egen match_id=group(group match_id_old)
drop match_id_old
save "W:\intimate\data\match_victim_data_clean_new", replace


